Prerequisites
Load required packages
library(tidyverse)
library(ggplot2)
library(rtweet)
library(readr)
Dataset
Import processed data, which can be found here.
#read preprocessed data
wines <- read.csv(file = '../data/processed_data/wines.csv')
Get sample of dataset
#set seed value to birthday of Ricardo Rodriguez, American wrestler and ring announcer and Dr. Reinaldo (Rei) Sanchez-Arias
set.seed(19630217)
#set percentage to test with for simplicity
percentage <- 5
wine_sample<- sample_n(wines, percentage/100*nrow(wines))
Split Taster data into different Data Frame
tasters <- wines %>%
select(taster_name, taster_twitter_handle) %>% unique()
tasters
Drop taster_twitter_handle in wines dataframe
wines <- wines %>%
select(-taster_twitter_handle)
head(wines)
Add Rating Classification
Add following classification to wine dataset as found on the website:
| Classic |
98-100 |
The pinnacle of quality. |
| Superb |
94-97 |
A great achievement. |
| Excellent |
90-93 |
Highly recommended. |
| Very Good |
87-89 |
Often good value; well recommended. |
| Good |
83-86 |
Suitable for everyday consumption; often good value. |
| Acceptable |
80-82 |
Can be employed in casual, less-critical circumstances |
# function to add rating
rating_category <- function(points){
if(points>=98){
return("Classic")
}
else if (points>=94){
return("Superb")
}
else if(points>=90){
return("Excellent")
}
else if(points>=87){
return("Very Good")
}
else if(points>=83){
return("Good")
}
else{
return("Acceptable")
}
}
wines<- wines %>%
rowwise() %>%
mutate(rating_category = rating_category(points))
head(wines)
Explore the Data
EDA (correlation priceXpoints, with DataExplorer library? using (this)[https://datascienceplus.com/blazing-fast-eda-in-r-with-dataexplorer/])
wines %>%
ggplot() +
geom_point(mapping = (aes(x = points, y = price)), na.rm = T)
wines %>%
summarize(avg_price = mean(price, na.rm=TRUE),
sd_price = sd(price, na.rm=TRUE),
lowest_price = min(price, na.rm=TRUE),
highest_price = max(price,na.rm=TRUE))
wines %>%
summarize(avg_points = mean(points, na.rm=TRUE),
sd_points = sd(points, na.rm=TRUE),
lowest_points = min(points, na.rm=TRUE),
highest_points = max(points,na.rm=TRUE))
Select the provinces based on points and Select the best province for wine based on the average points of the sample size.
#find the average number of points across the 1,000 samples
wine_per_province <- wine %>%
select(province, points) %>%
summarise(points = mean(points))
wine_per_province
#Find the best province for wine using the average points across the 1,000 samples #drop the descriptions or just select price? set points to max(points)
best_province <- wine_sample %>%
group_by(province, points) %>%
filter(points > 88.669)
best_province
Rating distribution
Best wine, by variety
#wine_best_variety <-
wines %>%
group_by(variety) %>%
summarise(mean_points = mean(points)) %>%
arrange(desc(mean_points))
user_price <- readline(prompt = "How much are you willing to spend on a bottle?")
user_price <- as.integer(user_price)
wines %>%
filter(price <= user_price) %>%
arrange(desc(points)) %>%
select(title, price, points)
Conclusion
LS0tCnRpdGxlOiAiRXhwbG9yaW5nIGFuZCBBbmFseWl6aW5nIFdpbmUgRW50aHVzaWFzdCBSZXZpZXdzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIFByZXJlcXVpc2l0ZXMKCkxvYWQgcmVxdWlyZWQgcGFja2FnZXMKYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocnR3ZWV0KQpsaWJyYXJ5KHJlYWRyKQpgYGAKCiMgRGF0YXNldAoKSW1wb3J0IHByb2Nlc3NlZCBkYXRhLCB3aGljaCBjYW4gYmUgZm91bmQgW2hlcmVdKGh0dHBzOi8vZ2l0aHViLmNvbS9DNHJieW4zbTRuL3dpbmVfcmV2aWV3c19kYXRhX2FuYWx5c2lzL2Jsb2IvbWFzdGVyL2RhdGEvcHJvY2Vzc2VkX2RhdGEvcHJlcHJvY2Vzc2luZy5ybWQpLgoKYGBge3J9CiNyZWFkIHByZXByb2Nlc3NlZCBkYXRhCndpbmVzIDwtIHJlYWQuY3N2KGZpbGUgPSAnLi4vZGF0YS9wcm9jZXNzZWRfZGF0YS93aW5lcy5jc3YnKQpgYGAKCkdldCBzYW1wbGUgb2YgZGF0YXNldApgYGB7cn0KI3NldCBzZWVkIHZhbHVlIHRvIGJpcnRoZGF5IG9mIFJpY2FyZG8gUm9kcmlndWV6LCBBbWVyaWNhbiB3cmVzdGxlciBhbmQgcmluZyBhbm5vdW5jZXIgYW5kIERyLiBSZWluYWxkbyAoUmVpKSBTYW5jaGV6LUFyaWFzCnNldC5zZWVkKDE5NjMwMjE3KQoKI3NldCBwZXJjZW50YWdlIHRvIHRlc3Qgd2l0aCBmb3Igc2ltcGxpY2l0eQpwZXJjZW50YWdlIDwtIDUKd2luZV9zYW1wbGU8LSBzYW1wbGVfbih3aW5lcywgcGVyY2VudGFnZS8xMDAqbnJvdyh3aW5lcykpCmBgYAoKIyMjIFNwbGl0IFRhc3RlciBkYXRhIGludG8gZGlmZmVyZW50IERhdGEgRnJhbWUKCmBgYHtyfQp0YXN0ZXJzIDwtIHdpbmVzICU+JQogIHNlbGVjdCh0YXN0ZXJfbmFtZSwgdGFzdGVyX3R3aXR0ZXJfaGFuZGxlKSAlPiUgdW5pcXVlKCkKdGFzdGVycwpgYGAKCkRyb3AgYHRhc3Rlcl90d2l0dGVyX2hhbmRsZWAgaW4gd2luZXMgZGF0YWZyYW1lCgpgYGB7cn0Kd2luZXMgPC0gd2luZXMgJT4lCiAgc2VsZWN0KC10YXN0ZXJfdHdpdHRlcl9oYW5kbGUpCmhlYWQod2luZXMpCmBgYAoKIyMjIEFkZCBSYXRpbmcgQ2xhc3NpZmljYXRpb24KCkFkZCBmb2xsb3dpbmcgY2xhc3NpZmljYXRpb24gdG8gd2luZSBkYXRhc2V0IGFzIGZvdW5kIG9uIHRoZSBbd2Vic2l0ZV0oaHR0cHM6Ly93d3cud2luZW1hZy5jb20vMjAxMC8wNC8wOS95b3UtYXNrZWQtaG93LWlzLWEtd2luZXMtc2NvcmUtZGV0ZXJtaW5lZC8pOgoKfENhdGVnb3J5ICB8IFJhdGluZyAgfCBEZXNjcmlwdGlvbiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfAp8LS0tLS0tLS0tLXwtLS0tLS0tLS18LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS18CnxDbGFzc2ljICAgfAk5OC0xMDAgfCBUaGUgcGlubmFjbGUgb2YgcXVhbGl0eS4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgfAp8U3VwZXJiICAgIHwJOTQtOTcJIHwgQSBncmVhdCBhY2hpZXZlbWVudC4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHwKfEV4Y2VsbGVudCB8CTkwLTkzCSB8IEhpZ2hseSByZWNvbW1lbmRlZC4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB8CnxWZXJ5IEdvb2QgfCAgODctODkJIHwgT2Z0ZW4gZ29vZCB2YWx1ZTsgd2VsbCByZWNvbW1lbmRlZC4gICAgICAgICAgICAgICAgICAgIHwKfEdvb2QJICAgICB8ICA4My04NgkgfCBTdWl0YWJsZSBmb3IgZXZlcnlkYXkgY29uc3VtcHRpb247IG9mdGVuIGdvb2QgdmFsdWUuICAgfAp8QWNjZXB0YWJsZXwJODAtODIJIHwgQ2FuIGJlIGVtcGxveWVkIGluIGNhc3VhbCwgbGVzcy1jcml0aWNhbCBjaXJjdW1zdGFuY2VzIHwKCmBgYHtyfQojIGZ1bmN0aW9uIHRvIGFkZCByYXRpbmcKcmF0aW5nX2NhdGVnb3J5IDwtIGZ1bmN0aW9uKHBvaW50cyl7CiAgaWYocG9pbnRzPj05OCl7CiAgICByZXR1cm4oIkNsYXNzaWMiKQogIH0KICBlbHNlIGlmIChwb2ludHM+PTk0KXsKICAgIHJldHVybigiU3VwZXJiIikKICB9CiAgZWxzZSBpZihwb2ludHM+PTkwKXsKICAgIHJldHVybigiRXhjZWxsZW50IikKICB9CiAgZWxzZSBpZihwb2ludHM+PTg3KXsKICAgIHJldHVybigiVmVyeSBHb29kIikKICB9CiAgZWxzZSBpZihwb2ludHM+PTgzKXsKICAgIHJldHVybigiR29vZCIpCiAgfQogIGVsc2V7CiAgICByZXR1cm4oIkFjY2VwdGFibGUiKQogIH0KfQoKd2luZXM8LSB3aW5lcyAlPiUKICByb3d3aXNlKCkgJT4lCiAgbXV0YXRlKHJhdGluZ19jYXRlZ29yeSA9IHJhdGluZ19jYXRlZ29yeShwb2ludHMpKQpoZWFkKHdpbmVzKQpgYGAKCiMgRXhwbG9yZSB0aGUgRGF0YQoKRURBIChjb3JyZWxhdGlvbiBwcmljZVhwb2ludHMsIHdpdGggYGBgRGF0YUV4cGxvcmVyYGBgIGxpYnJhcnk/IHVzaW5nICh0aGlzKVtodHRwczovL2RhdGFzY2llbmNlcGx1cy5jb20vYmxhemluZy1mYXN0LWVkYS1pbi1yLXdpdGgtZGF0YWV4cGxvcmVyL10pCmBgYHtyfQp3aW5lcyAlPiUgCiAgZ2dwbG90KCkgKwogICAgZ2VvbV9wb2ludChtYXBwaW5nID0gKGFlcyh4ID0gcG9pbnRzLCB5ID0gcHJpY2UpKSwgbmEucm0gPSBUKQpgYGAKCmBgYHtyfQp3aW5lcyAlPiUKICAgIHN1bW1hcml6ZShhdmdfcHJpY2UgPSBtZWFuKHByaWNlLCBuYS5ybT1UUlVFKSwgCiAgICAgICAgICAgICAgc2RfcHJpY2UgPSBzZChwcmljZSwgbmEucm09VFJVRSksCiAgICAgICAgICAgICAgbG93ZXN0X3ByaWNlID0gbWluKHByaWNlLCBuYS5ybT1UUlVFKSwKICAgICAgICAgICAgICBoaWdoZXN0X3ByaWNlID0gbWF4KHByaWNlLG5hLnJtPVRSVUUpKQpgYGAKCmBgYHtyfQp3aW5lcyAlPiUKICAgIHN1bW1hcml6ZShhdmdfcG9pbnRzID0gbWVhbihwb2ludHMsIG5hLnJtPVRSVUUpLCAKICAgICAgICAgICAgICBzZF9wb2ludHMgPSBzZChwb2ludHMsIG5hLnJtPVRSVUUpLAogICAgICAgICAgICAgIGxvd2VzdF9wb2ludHMgPSBtaW4ocG9pbnRzLCBuYS5ybT1UUlVFKSwKICAgICAgICAgICAgICBoaWdoZXN0X3BvaW50cyA9IG1heChwb2ludHMsbmEucm09VFJVRSkpCmBgYAoKU2VsZWN0IHRoZSBwcm92aW5jZXMgYmFzZWQgb24gcG9pbnRzICBhbmQgU2VsZWN0IHRoZSBiZXN0IHByb3ZpbmNlIGZvciB3aW5lIGJhc2VkIG9uIHRoZSBhdmVyYWdlIHBvaW50cyBvZiB0aGUgc2FtcGxlIHNpemUuIAoKI2ZpbmQgdGhlIGF2ZXJhZ2UgbnVtYmVyIG9mIHBvaW50cyBhY3Jvc3MgdGhlIDEsMDAwIHNhbXBsZXMKYGBge3J9CndpbmVfcGVyX3Byb3ZpbmNlIDwtIHdpbmUgJT4lIAogIHNlbGVjdChwcm92aW5jZSwgcG9pbnRzKSAlPiUgCiAgc3VtbWFyaXNlKHBvaW50cyA9IG1lYW4ocG9pbnRzKSkKd2luZV9wZXJfcHJvdmluY2UKYGBgCgoKI0ZpbmQgdGhlIGJlc3QgcHJvdmluY2UgZm9yIHdpbmUgdXNpbmcgdGhlIGF2ZXJhZ2UgcG9pbnRzIGFjcm9zcyB0aGUgMSwwMDAgc2FtcGxlcwojZHJvcCB0aGUgZGVzY3JpcHRpb25zIG9yIGp1c3Qgc2VsZWN0IHByaWNlPyBzZXQgcG9pbnRzIHRvIG1heChwb2ludHMpCmBgYHtyfQpiZXN0X3Byb3ZpbmNlIDwtIHdpbmVfc2FtcGxlICU+JSAKICBncm91cF9ieShwcm92aW5jZSwgcG9pbnRzKSAlPiUgCiAgZmlsdGVyKHBvaW50cyA+IDg4LjY2OSkKYmVzdF9wcm92aW5jZSAgCmBgYAoKClJhdGluZyBkaXN0cmlidXRpb24KCmBgYHtyfQoKYGBgCgpCZXN0IHdpbmUsIGJ5IHZhcmlldHkKYGBge3J9CiN3aW5lX2Jlc3RfdmFyaWV0eSA8LSAKd2luZXMgJT4lIAogIGdyb3VwX2J5KHZhcmlldHkpICU+JSAKICBzdW1tYXJpc2UobWVhbl9wb2ludHMgPSBtZWFuKHBvaW50cykpICU+JSAKICBhcnJhbmdlKGRlc2MobWVhbl9wb2ludHMpKSAKICAKYGBgCgpgYGB7cn0KdXNlcl9wcmljZSA8LSByZWFkbGluZShwcm9tcHQgPSAiSG93IG11Y2ggYXJlIHlvdSB3aWxsaW5nIHRvIHNwZW5kIG9uIGEgYm90dGxlPyIpCnVzZXJfcHJpY2UgPC0gYXMuaW50ZWdlcih1c2VyX3ByaWNlKQoKd2luZXMgJT4lIAogIGZpbHRlcihwcmljZSA8PSB1c2VyX3ByaWNlKSAlPiUgCiAgYXJyYW5nZShkZXNjKHBvaW50cykpICU+JSAKICBzZWxlY3QodGl0bGUsIHByaWNlLCBwb2ludHMpCmBgYAoKCiMgQ29uY2x1c2lvbgo=